Street Flooding Complaints (SFC)
Contents
Street Flooding Complaints (SFC)#
Import Libraries#
Built-in Libraries#
import json
import os
External Libraries#
import pyproj
import geopandas as gpd
import pandas as pd
# import geojson as gj
311 Service Requests from 2010 to Present#
About#
Key |
Value |
|---|---|
URL |
https://data.cityofnewyork.us/Social-Services/311-Service-Requests-from-2010-to-Present/erm2-nwe9 |
Description |
All 311 Service Requests from 2010 to present. |
Updated |
2023-02-13 |
Views |
440K+ |
Data Provided by |
311, DoITT |
Category |
|
API Docs |
https://dev.socrata.com/foundry/data.cityofnewyork.us/erm2-nwe9 |
API Endpoints |
|
|
Sewer |
|
Street Flooding (SJ) |
Define Variables#
Default limit = 1000
%%script echo skip
NYC_OPEN_DATA_311_API_JSON = 'https://data.cityofnewyork.us/resource/erm2-nwe9.json?descriptor=Street%20Flooding%20(SJ)'
NYC_OPEN_DATA_311_API_GEOJSON = 'https://data.cityofnewyork.us/resource/erm2-nwe9.geojson?descriptor=Street%20Flooding%20(SJ)'
NYC_OPEN_DATA_311_API_CSV = 'https://data.cityofnewyork.us/resource/erm2-nwe9.csv?descriptor=Street%20Flooding%20(SJ)'
skip
Download 311 Service Complaints for Street Flooding (SJ)#
Define prefix for output variable#
%%script echo skip
output_prefix = 'data/street_flood-complaints.'
skip
Save .json data locally#
%%script echo skip
street_flooding_jdf = pd.read_json(NYC_OPEN_DATA_311_API_JSON)
street_flooding_jdf.to_json(output_prefix + 'json')
skip
Save .geojson data locally#
%%script echo skip
street_flooding_gdf = gpd.read_file(NYC_OPEN_DATA_311_API_GEOJSON, driver='GeoJSON')
street_flooding_gdf.to_file(output_prefix + 'geojson')
skip
def get_street_flooding_data(file_type: str = 'geojson') -> None:
"""_summary_
Args:
file_type (str, optional): _description_. Defaults to 'geojson'.
"""
df_size = -1
file_size = 10000
limit = file_size
current_file = 0
output_prefix = 'data/street_flood-complaints'
while df_size != 0:
street_flooding_df = gpd.read_file(get_api_endpoint(limit, current_file), driver='GeoJSON')
df_size = len(street_flooding_df)
if df_size == 0:
break
else:
file_name_output = get_output_file_name(output_prefix, limit, current_file, file_type)
street_flooding_df.to_file(file_name_output)
print(f'Save file {current_file + 1}: {file_name_output}')
current_file += 1
def get_api_endpoint(limit: int, current_file: int) -> str:
"""_summary_
Args:
limit (int): _description_
current_file (int): _description_
Returns:
str: _description_
"""
offset = limit * current_file
return f'https://data.cityofnewyork.us/resource/erm2-nwe9.geojson?descriptor=Street%20Flooding%20(SJ)&$limit={limit}&$offset={offset}&$order=unique_key'
def get_output_file_name(output_prefix: str, limit: int, current_file: int, file_type: str):
"""_summary_
Args:
output_prefix (str): _description_
limit (int): _description_
current_file (int): _description_
file_type (str): _description_
Returns:
_type_: _description_
"""
start_num = 1 + (limit * current_file)
end_num = (1 + current_file) * limit
return f'{output_prefix}_{start_num :06d}_{end_num :06d}.{file_type}'
%%script echo skip
output_prefix = 'data/street_flood-complaints'
file_size = 10000
limit = file_size
current_file = 0
file_type = 'geojson'
get_output_file_name(output_prefix, limit, current_file, file_type)
skip
%%script echo "skip: refactor to check if already downloaded"
get_street_flooding_data(file_type = 'geojson')
skip: refactor to check if already downloaded
geojson_file_list = ['data/' + geojson_file for geojson_file in os.listdir('data/') if geojson_file.endswith('.geojson')]
# print(geojson_file_list)
geojson_df_list = list()
for geojson_file in geojson_file_list:
geojson_file_df = gpd.read_file(geojson_file, driver='GeoJSON')
geojson_df_list.append(geojson_file_df)
street_flooding_gdf = pd.concat(geojson_df_list)
Save .csv data locally#
%%script echo skip
street_flooding_cdf = pd.read_csv(NYC_OPEN_DATA_311_API_CSV)
street_flooding_cdf.to_csv(output_prefix + 'csv')
skip
View Street Flooding Metadata#
street_flooding_gdf.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 35006 entries, 0 to 5005
Data columns (total 45 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 location_state 33999 non-null object
1 facility_type 23296 non-null object
2 intersection_street_2 11289 non-null object
3 city 34141 non-null object
4 location_zip 33999 non-null object
5 park_borough 35002 non-null object
6 latitude 33999 non-null object
7 road_ramp 0 non-null float64
8 created_date 35006 non-null datetime64[ns]
9 agency 35006 non-null object
10 park_facility_name 35006 non-null object
11 location_address 33999 non-null object
12 agency_name 35006 non-null object
13 descriptor 35006 non-null object
14 bbl 21669 non-null object
15 location_city 33999 non-null object
16 open_data_channel_type 35006 non-null object
17 cross_street_2 30035 non-null object
18 bridge_highway_direction 0 non-null float64
19 longitude 33999 non-null object
20 bridge_highway_segment 0 non-null float64
21 street_name 23769 non-null object
22 incident_address 23769 non-null object
23 address_type 35000 non-null object
24 incident_zip 34137 non-null object
25 unique_key 35006 non-null object
26 complaint_type 35006 non-null object
27 y_coordinate_state_plane 33999 non-null object
28 status 35006 non-null object
29 bridge_highway_name 0 non-null float64
30 location_type 0 non-null float64
31 due_date 1 non-null datetime64[ns]
32 taxi_company_borough 0 non-null float64
33 taxi_pick_up_location 0 non-null float64
34 x_coordinate_state_plane 33999 non-null object
35 resolution_description 34989 non-null object
36 community_board 35002 non-null object
37 resolution_action_updated_date 34997 non-null datetime64[ns]
38 intersection_street_1 11289 non-null object
39 closed_date 34989 non-null datetime64[ns]
40 vehicle_type 0 non-null float64
41 cross_street_1 30045 non-null object
42 borough 35002 non-null object
43 landmark 0 non-null float64
44 geometry 33999 non-null geometry
dtypes: datetime64[ns](4), float64(9), geometry(1), object(31)
memory usage: 12.3+ MB
Convert datetime64 data type to string#
# created_date, resolution_action_updated_date, closed_date
street_flooding_gdf['created_date'] = street_flooding_gdf['created_date'].dt.strftime('%Y-%m-%d %H:%M:%S')
street_flooding_gdf['resolution_action_updated_date'] = street_flooding_gdf['resolution_action_updated_date'].dt.strftime('%Y-%m-%d %H:%M:%S')
street_flooding_gdf['closed_date'] = street_flooding_gdf['closed_date'].dt.strftime('%Y-%m-%d %H:%M:%S')
Set unique_key as Index#
street_flooding_gdf.set_index('unique_key', inplace=True)
Remove Rows With Missing geometry#
street_flooding_gdf.dropna(subset = ['geometry'], inplace = True)
Preview Street Flooding Data#
street_flooding_gdf[['created_date', 'borough', 'bbl', 'geometry']].head(10)
| created_date | borough | bbl | geometry | |
|---|---|---|---|---|
| unique_key | ||||
| 15639934 | 2010-01-02 08:26:00 | BROOKLYN | 3089000064 | POINT (-73.92178 40.58778) |
| 15640572 | 2010-01-02 12:00:00 | STATEN ISLAND | NaN | POINT (-74.14329 40.63866) |
| 15640664 | 2010-01-02 17:45:00 | QUEENS | 4120050012 | POINT (-73.79530 40.68140) |
| 15655327 | 2010-01-04 16:47:00 | QUEENS | 4106210008 | POINT (-73.73843 40.72006) |
| 15668560 | 2010-01-05 10:37:00 | BROOKLYN | 3086550021 | POINT (-73.90969 40.61250) |
| 15674300 | 2010-01-06 19:26:00 | BROOKLYN | 3029270015 | POINT (-73.93297 40.71584) |
| 15674896 | 2010-01-06 08:24:00 | QUEENS | 4119960122 | POINT (-73.80255 40.67925) |
| 15674924 | 2010-01-06 09:17:00 | STATEN ISLAND | 5040740044 | POINT (-74.10646 40.55866) |
| 15675505 | 2010-01-06 06:00:00 | QUEENS | 4030030044 | POINT (-73.87694 40.71804) |
| 15683503 | 2010-01-07 10:16:00 | STATEN ISLAND | 5014850078 | POINT (-74.14943 40.61979) |
View on Map#
street_flooding_gdf['geometry'] = street_flooding_gdf.geometry
popup_columns = [
'geometry',
'created_date',
'incident_address',
'city',
'incident_zip',
'borough',
'bbl',
'status',
]
street_flooding_gdf[popup_columns].explore('borough')
Make this Notebook Trusted to load map: File -> Trust Notebook